import glob
import os

import pandas as pd
from tqdm import tqdm

excel_path = "/media/hsmy/16TB/20240724GIST/sm.xlsx"
df = pd.read_excel(excel_path, usecols=[0])
df = df.dropna(axis=0, how='any')
column_data = df.iloc[:, 0].values
# print(column_data)

lack_wsi_arr = []
wsi_folder = "/media/hsmy/16TB/20240724GIST/GIST南方医"
for col in tqdm(column_data):
    search_key = str(col)
    p1 = os.path.join(wsi_folder, f'{search_key}*.svs')
    p2 = os.path.join(wsi_folder, f'{search_key}*.ndpi')

    files = glob.glob(p1) + glob.glob(p2)
    if len(files) == 0:
        lack_wsi_arr.append(col)

print(lack_wsi_arr)
print(len(lack_wsi_arr))
pd.DataFrame(lack_wsi_arr).to_csv('miss_sm.csv', index=None, header=None)
